{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "afa56065",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Grid World Environment:\n",
      "Shape: (3, 4)\n",
      "Actions: [0, 1, 2, 3]\n",
      "Start state: (2, 0)\n",
      "Goal state: (0, 3)\n",
      "Wall state: (1, 1)\n",
      "\n",
      "Running Monte Carlo First Visit...\n",
      "Episode 1000/5000 completed\n",
      "Episode 2000/5000 completed\n",
      "Episode 3000/5000 completed\n",
      "Episode 4000/5000 completed\n",
      "Episode 5000/5000 completed\n",
      "\n",
      "Value Function:\n",
      "State (0, 0): 0.111\n",
      "State (0, 1): 0.238\n",
      "State (0, 2): 0.350\n",
      "State (1, 0): -0.024\n",
      "State (1, 2): -0.462\n",
      "State (2, 0): -0.168\n",
      "State (2, 1): -0.324\n",
      "State (2, 2): -0.525\n",
      "State (2, 3): -0.774\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAApIAAAIQCAYAAAAy8I61AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAPORJREFUeJzt3QmcTfX/x/GPfR1j3/elsWVfkn3n72drIcnebkmKUv0sUfwkUYQWVIgiRCK7FNkVRci+jRlmmLEM4/4fn2/d250N8zX33llez8fjNO455957zsy3e9/nu51UDofDIQAAAEA8pY7vEwAAAABFkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkARSgJ49e0rx4sXvuN/Ro0clVapUMmvWLEnsGjVqZJY7Wb9+vTkn/elNSel3mRj46u8E4N4QJIFE7MiRI9KvXz+57777JHPmzGYpX7689O3bV3799VefHNPWrVvNF/57770XY1v79u3NtpkzZ8bY1qBBAylUqJAkRu3atTO/28uXL8e5T9euXSV9+vQSHBwsid2IESPM3yG2Zdq0aT49tg8//JBwDSQjaX19AABit2zZMuncubOkTZvWhJjKlStL6tSpZf/+/fLNN9/I1KlTTdAsVqzYHV/r448/llu3biXIcVWrVs2Erk2bNsmLL74YZdvPP/9sjvenn36SXr16udZHRETItm3bpG3btpIY6e936dKlsmjRIunevXuM7VeuXJElS5ZIq1atJFeuXJJUaBnJmjVrlHW1a9cWXwfJ3Llzm1ry6BcaV69eNWEdQNJBkAQSocOHD8tjjz1mQuKaNWukQIECUbb/73//M1/IGixvJzw8XLJkySLp0qVLsGPToKhhRMOiuwMHDkhQUJA8/vjjJmS627Fjh1y7dk3q1at3z++voU6DbELXSPr5+cncuXNjDZIaIvV3qYEzKXnkkUdMaEsKtCxnzJjR14cBIJ5o2gYSoXHjxpngok3E0UOkM8wNGDBAihQp4lqnNTxa+6Qh9P/+7/9MMHIGn9j6SIaEhJj1/v7+kj17dunRo4dZdzc0EJ47d04OHTrkWqfBMlu2bPL000+7QqX7NufznDQIV6hQQTJkyCAFCxY0zfXR31/7QFasWNEEUa2x0gD52muvxXlcJ0+elA4dOpjwnDdvXlNjev369TueT6ZMmeShhx4yoT0wMDDGdg2Y+vvUwHnhwgV5+eWX5f777ze/bz3n1q1by549e6z7dcb299Ea5IkTJ5rfkQasfPnyyTPPPCMXL14UT/bf1PXaNB69mVz/1nqcWla0zGiNs4b66GbPni21atUyf6scOXKYv9sPP/xgtuk57tu3TzZs2OBqanf+PuLqI/n1119L9erVzd9IQ/ETTzwhp06dirKPs+zrev3767/z5Mlj/k6RkZH3/PsCEDeCJJBIm7VLly4d72bImzdvSsuWLU2IGj9+vDz88MOx7udwOEx/xi+++MJ8MY8ePdqEMA2Td8MZCN1rHjUsPvDAA+aYtQZUm7ndt2kQ0+Z5ZzjR4KgB8t133zXHOX36dGnRooXcuHEjyntpn0QNalWqVDHBqnHjxrEekzaLNm3aVFauXGn6lb7++uvy448/ypAhQ+7qnDR06+/vq6++irJeg6O+ZseOHU2Y+euvv2Tx4sXyn//8RyZMmCCDBw+W3377TRo2bCinT5+WhKKhUV+7bt26MmnSJBPc5syZY/6+0X9HcdFj10DvXO4lhHbq1Mn0IR0zZoz5t4bQkSNHRtlHH3fr1s38/d98803zWC921q5da7br369w4cJStmxZU/Z00b9TXPQ99L3SpElj3vepp54y3Tq0/EW/6NDAqL8b7XqgZV//Hlq2PvroI+tzBnAXHAASldDQUIf+r9mhQ4cY2y5evOg4f/68a7ly5YprW48ePczzXn311RjP023FihVzPV68eLHZd9y4ca51N2/edNSvX9+snzlz5m2P8dKlS440adI4+vTp41oXEBDgGDlypPl3rVq1HIMHD3Zty5Mnj6N58+bm34GBgY706dM7WrRo4YiMjHTtM3nyZPPeM2bMcK1r2LChWTdt2rQYx6DbdHGaOHGi2ferr75yrQsPD3eULl3arF+3bt1tz0nPv0CBAo46depEWa/vrc9fuXKleXzt2rUox62OHDniyJAhg+PNN9+Msi767zL6Mcf19/nxxx/Nc+fMmRNlvxUrVsS6Prrhw4eb/aIvzveI7dicdL0+P/pr9e7dO8p+HTt2dOTKlcv1+ODBg47UqVOb9dF/P7du3XL9u0KFCrH+DvTv4/53ioiIcOTNm9dRsWJFx9WrV137LVu2zOw3bNiwKL8/Xef++1dVq1Z1VK9e/ba/KwD3hhpJIJG5dOmS+Rl9kITSZkBtsnMuU6ZMibHPc889d8f3WL58uWked99Xa3369+9/V8eotYuVKlVy1UhqbZc2Zz/44IPmsdaiOZuz//zzTzl//ryrFnP16tVm8M3AgQOj9PHU2iZtJv7uu++ivJc2fbsP3LndOWk3AO0X6KTNq9rUfjf0/LVf6ubNm03Tr3uztjYra22n83icx621YFpjqn+rgIAA2blzpyQEbc7V5uPmzZtHqVHUJl59r3Xr1t3V6yxcuFBWrVrlWrRG09azzz4b5XH9+vXNuTvLq9bSanP8sGHDYvTd1Sbr+Nq+fbvpZvD8889H6TvZpk0bU6MZvZzEdYxagwzAcwiSQCKjIU2FhYXF2KbNvxoItB9abDQcatPhnRw7dsyEruhhVcPQ3dJg6OwLqc3YGsS0aVtpoNR+jdo/MXr/SH3v2N5LR+uWLFnStd1Jpwy6m5G8+jztDhA9tMTnnJx9SjU8Km3u1+ZxDZh6fkrDkk59VKZMGRMqtd+ehnqdjik0NFQSwsGDB81raRcF9wsHXbRcxNaPMzbaP7FZs2auRQO+raJFi0Z5rP0flbO5XPvmaoDU6akSQlzlRGmQjF5ONGzq7yf6MSZEn1IAcWPUNpDIaE2Uhry9e/fG2ObsM+leY+bOvbbM0zQYfvDBByYoapB0Dj5xBkkNkTrlj9ZaasB1hsz40n6J3qI1fhpSvvzySzOoR39qa6/7aO23335b/vvf/0rv3r1l1KhRkjNnTvM71xrWO02xpCH379bjqKIPCNHX0RAZVw1i9MAUX3HVEN5uYIozSEcX2/n4QlzHB8CzCJJAIqTNd5988omZ/FtHwCY057RCWrvlXiupNYx3y33AjTYHu9d26SAafQ8NmbpUrVrVNWWPc95LfS+tgXTS5m6dF1NrzmzPScO3Bhv3oBSfc1IaGjUoag2j1kxqzWPNmjVd2xcsWGAG/Hz66adRnqeDP+401Y7WkMXW1Bq9dq1UqVKmC4D+Tj0RpJ21idEHrEQ/jvjQY9YA/Pvvv5uBUXG522Zu93LSpEmTKNt03d3MnwrA82jaBhIhHWmswUtrvXSanYSuBdLpgXSEsk5Y7V4bpTWMd0vDYokSJUwg1f5szv6RTvpY+83pl777tD8aFLWp+v33349yHhrMtDlXQ7TtOemoaQ16Tjo9TXxH7TprH7Wv3+7du2PMHak1X9F//9qnMfqUNHGFLZ1QXvuMOum0QdHn5NSRyvr30BrP6PTvdrfTNMVF+6Jq6N24cWOU9Tolky2ddkdrZnW0dvSaWfffl07NdDfHX6NGDVMrq3ficZ/C6fvvv5c//vjDupwASFjUSAKJkNaCaW1Yly5dTB8x551t9AtZa+10m35p301/yNjoHWa0tuvVV181zeTar02nVYlvHz8NiDqFi4re/06DpDYNO/dzb5YdOnSomRpG7xSjczNq2NQQozV/Oh2RDR2sM3nyZDOhuPbP1O4Bemzxnbxcw7Eeu05CrqIHSZ32R8OSDgDS/XTqH22Cdq9djYteGOiUQTpNTZ8+fUxfRw1KOlekc9CK0qlrdPofnfJGw6xOi6RT6mjfSQ2tOh2Q+6AiG08++aSMHTvW/NTQpqFSB0bZ0v6pOpWPhl8d5KLzcmpXC+3eoBcdei7O7gN6AaNTTulzNCxGr3FUer468b7+nvX3of8v6EWVnrvORxn9rkoAfOQeR30D8KBDhw45nnvuOTOFTcaMGR2ZMmVylC1b1vHss886du/eHWVfnQIlS5Yssb5O9OllVHBwsKNbt26ObNmyOfz9/c2/d+3adVfT/zhNnz7d7F+oUKEY23bu3OmadubcuXMxtut0P3ou6dKlc+TLl8+cp05v5E6nidHpYmIT21Q6x44dc7Rr186ROXNmR+7cuR0vvPCCa8qcO03/427KlCnmOTqNUXQ6/c9LL71kpgrSv0fdunUdmzdvjnE8cU2xM3v2bEfJkiXNFEhVqlQx0wrF9vdRH330kZm+Rt/Hz8/Pcf/99zuGDBniOH369G2P3zllj04RFRedOkqnb9K/vb52p06dzNRMcU3/E/219Lx0vZ6nO52+Safd0emQcuTIYX4nq1atcm0/e/aso02bNuY99fnO31n06X+c5s+f73q9nDlzOrp27eo4efLkXZV957ED8JxU+h9fhVgAAAAkXfSRBAAAgBWCJAAAAKwQJAEAAOD5IKkj7fS2aDp1hC516tQxUzEAAAAg5YnXYJulS5eaOdR0ahJ92meffSbvvPOO7Nq1y0xfAQAAgJTjnkdt6+3BNEzqnGgAAABIOawnJNe7LujEuOHh4aaJOy56RwL3uxLoHQ8uXLgguXLluutbZQEAAMBztF7x8uXL5gYCesMLjwVJvYuDBsdr166Ze/QuWrTI3BUjLno3A72DBQAAABK3EydOxOuuafFu2o6IiJDjx4+bW6npPW0/+eQT2bBhQ5xhMnqNpD6vaNGi5jZXOnAH8IT169ebWwuGhYWZPr2AJ1DO4M1ytiJoheQokcPXh4Nk6tzv52TeK/MkJCRE/P39PVcjmT59enN/VOc9U/U+qhoKp0+fHuv+eq9VXaLTENmoUaP4vj1wV06dOmXusaxf7nofYcATKGfwZjnLXzG/FKxS0NeHg2QuVTy7Hd7zPJLa59G9xhEAAAApQ7xqJIcOHSqtW7c2TdPaIXPu3Lmmyn3lypWeO0IAAAAk/SAZGBgo3bt3lzNnzpj2c22e1hDZvHlzzx0hAAAAkn6Q/PTTTz13JAAAAEhSuNc2AAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFhJa/c0AAAAz4u8ESmrJqySnd/slNSpU0ua9GkkR5Ec0uqVVlL4/sJy9sBZ+XbYt3Lu4Dmzf7778kn7N9ubn04Oh0NGVR0luYrnkr6L+7rWBx8PlncavCNjj471ybklBwRJAACQaM3tN1euh1+XgSsHSubsmc26A+sPSODBQPHL7SeT206Wjm93lOqPVDfbdizcIZPbTZbBGwZLtnzZzLo/N/wpmfwzyel9pyX4WLDkKpbLp+eUnNC0DQAAEqXzh8/Lb9/9Jl0+6OIKkSqgUYBUe6iabJqxSUrXLe0Kkar6w9WlVJ1SsunTTa51W2ZvkTrd65htv8z+xevnkZwRJAEAQKJ08teTkrtEbsmSI0vs2/eclOI1i8dYr+t0mwq/GC771+yXao9Ukwe6PSBbv9wqt27d8vixpxQESR+YMmWKFC9eXDJmzCi1a9eWrVu3xrnvvn375OGHHzb7p0qVSiZOnBhjn40bN0rbtm2lYMGCZp/Fixd7+AyQ3MrZxx9/LPXr15ccOXKYpVmzZrfd/9lnn42zPCJliU85++abb6RGjRqSPXt2yZIli1SpUkW++OKLKPv07NnTlC33pVWrVl44EyQFQUeCZFyDcfJWrbdkbt+5d/WcHV/vkHLNyklm/8xSsEJB8cvrZ4IlEgZB0svmz58vgwYNkuHDh8vOnTulcuXK0rJlSwkMDIx1/ytXrkjJkiVl7Nixkj9//lj3CQ8PN6+jH+iATTlbv369dOnSRdatWyebN2+WIkWKSIsWLeTUqVMx9l20aJFs2bLFXLggZYtvOcuZM6e8/vrrpoz9+uuv0qtXL7OsXLkyyn4aHM+cOeNavvzySy+dERKbwpUKm/B4JeSKeay1k0M2DpFmA5vJldArUrhyYTm67WiM5+k6fa6zWfvgjwdlZOWRZtE+kjRvJxyCpJdNmDBBnnrqKfPhWb58eZk2bZpkzpxZZsyYEev+NWvWlHfeeUcee+wxyZAhQ6z7tG7dWkaPHi0dO3b08NEjuZazOXPmyPPPP29qiMqWLSuffPKJafpZs2ZNlP00WPbv39/sny5dOi+dDZJLOWvUqJH5nCpXrpyUKlVKXnjhBalUqZJs2vRvXzaln3V64exctJYcKVOeUnmkYuuKMm/APBMcnSKuRJifdXvVlYObDsqOBTtc23SwzaGfDkndPnXlxO4TEh4cLiN/HynD9ww3yxs735D96/ZLWFCYT84puWHUthdFRETIjh07ZOjQoa51OpWBNiPqFTqQWMqZ1oTfuHHD1CA5abDs1q2bDB48WCpUqOCRY0fKKWc6HcvatWvlwIED8r///S9GDXnevHlNgGzSpIm5UM6Vi1G2KdXjUx6XVe+ukveavydp0qSRTNkzSdbcWaXpgKaSvWB26fdtP1kybIksf2u5SCqRvKXzSr+l/cQ/v7/8MP4HqdqxqimbTtrErYN1ts3fJpXaVpLrl6/L8ArDXduzF8ouL/7woo/ONukhSHpRUFCQREZGSr58/85tpfTx/v3010DiKWevvPKKabrWUOCkX/Zp06aVAQMGJPgxI+WUs9DQUClUqJBcv37dhIIPP/xQmjdvHqVZ+6GHHpISJUrI4cOH5bXXXjOtLhpOdX+kPGnTp5XWQ1ubJTYFyhWQZ79+NtZtj45/NNb1vT/v7fr3e8HvJdCRpkwESQBRaH/cefPmmVohHUChtOZp0qRJph+cDn4AbPn5+cnu3bslLCzMdJ3QPpbaD1ybvZV243G6//77TdO3NoNreWzatKkPjxxAbOgj6UW5c+c2V9Tnzv09+76TPo5rIA3gzXI2fvx4EyR/+OEH8wXu9OOPP5oBFEWLFjW1krocO3ZMXnrpJTNiFymPbTnTJsbSpUub/rhafh555BEZM2ZMnPtryNT3OnToUIIeP4CEQZD0ovTp00v16tWjDGBwDmioU6eOT48NyYdtORs3bpyMGjVKVqxYYaZocad9I3WUrdYkORdt+tb+ktFH3CJlSKjPM32ONnPH5eTJkxIcHCwFChS452NG8pigfGLLifJWzbfk3abvypk/zsS635YvtsjoGqNlVLVRMu+FeeY2i3ezDfFH07aXaTNOjx49zBd1rVq1zDx8On2PjnpU3bt3N/2HnFfo2qH9999/d/1bR83ql3jWrFnNVb3SJiL3q/UjR46YfXSghNYgIeWJbznT/o/Dhg2TuXPnmhrGs2fPmvVaznTRgQ7RBzvoqG2teQoICPDBGSIpljP9qftqU7WGx+XLl5t5JKdOner6LBs5cqSZO1fLlvaRHDJkiPms02mFgK8GfSV1etSR2o/Xlt1LdpvbJ7605qUo++j0PsvHLJeX171s5oz8pOsn8vNnP0v9J+vfdhvsECS9rHPnznL+/Hnzpa1f1tq8ozVAzg7rx48fjzK67PTp01K1atUoTY+6NGzY0PQZUtu3b5fGjRtH+XBX+gE/a9YsL54dkmo50y9yvVDRZkZ3Oj/giBEjvH78SJ7lTEOmTjOltYyZMmUyU03Nnj3bvI7SpnKt+f7ss88kJCTE1HrrfKZaUx7X9GdIOS6fvyzHdx2XZxf+PbCmcrvKsvCVhXL+r/OSp2Qe1357luyRiq0quu6zrVMErZqwyoTF222DHYKkD/Tr188ssXGGQyetHdJpMm5HO6nfaR+kPPEpZ0ePxpzQ905snoOUXc50Gh9d4qLhkq4SiEvIqRDJlj+bpEn79+h9HfiXo3AOuXjyYpQgefHURclR5N+5R3MWyWn2udM22KGPJAAAAKwQJAEAQKKnE4VfOntJIm/+PThGW+K0NlFrJd3lKJRDLp74t5bxwokLrn1utw12CJIAACDR88vjZ+6tvf2r7ebxnm/3mDvbuDdrq0rtKsneFXvl0rlLJmz+NPMnqfZQtTtugx36SAIAgCSh04ROMrfvXFn93mrJ6JdRukzuYtbrvbj1nty65C6eW1q/0lomtZ5ktpWuW1oe7Pmg+ffttsEOQRIAACQJ+crki/U+2I+9/+8dkZROEaRLbG63DfFH0zYAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAAPB8kBwzZozUrFlT/Pz8JG/evNKhQwc5cOCA3TsDAAAg5QTJDRs2SN++fWXLli2yatUquXHjhrRo0ULCw8M9d4QAAABIlNLGZ+cVK1ZEeTxr1ixTM7ljxw5p0KBBQh8bAAAAkkuQjC40NNT8zJkzZ5z7XL9+3SxOly5dMj/Xr18vp06dupe3B+K0bds2ady4sZw9e1b27t3r68NBMqWfbVmzZpXVq1fTzQce/zw7vOGwBB4K9PXhIJm68NcFq+elcjgcDpsn3rp1S9q1aychISGyadOmOPcbMWKEjBw5Msb6uXPnSubMmW3eGgAAAAnoypUr8vjjj5tKwmzZsnm+RlL7SmpNz+1CpBo6dKgMGjQoSo1kkSJFJCwsTMqUKWP79sBtaU1kZGSkfPvtt7Jv3z5fHw6SqUKFCkn37t3N51lAQICvDwfJlNZ4lytXToK+/lpK+fv7+nCQTJ0ICrJ6nlWQ7Nevnyxbtkw2btwohQsXvu2+GTJkMEt0GiJr1Khh8/bAHelFzuHDh02I/OWXX3x9OEimKlWqZH5qiOTzDJ7i7DahIbJq3ry+PhwkU+EREZ4PktoK3r9/f1m0aJHp41iiRAmrNwUAAEDSlza+zdnat3HJkiVmLkltPlT+/v6SKVMmTx0jAAAAkvo8klOnTjWdMBs1aiQFChRwLfPnz/fcEQIAACBRinfTNgAAAKC41zYAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWElr9zQAAADPqjd1qvl5IzJSDgYHS/m8ec3jMrlzy4hmzaTq+++bdY5/9h/WtKm0vO8+1/MvX78uZcePl44VK8rk9u1d6388ckQenTNHSufKJbccDsmWIYOMb9NGKubP7+UzTPoIkgAAIFHa9Nxz5uexixel/rRprsfOdVnTp3etW3HggDy5YIEcffVVSZP67wbXRXv3SuWCBWXpH3/I2FatJGuGDK7na4h0Pnfyzz9L3yVLZMMzz3j5DJM+mrYBAECS17BkSbkcESEXr151rfti1y4ZWLeuPFismHyzb1+cz21WurQcCgry0pEmLwRJAACQ5C35/XdpUKKE5M6SxTzeHxgop0JDpWnp0tKtalWZvXNnnM9d+E/NJeKPIOkDDodDhg0bJgUKFJBMmTJJs2bN5ODBg3d83pQpU6R48eKSMWNGqV27tmzdutW17cKFC9K/f38JCAgwr1m0aFEZMGCAhIaGevhsAKRktp9nTmPHjpVUqVLJwIEDY2zbvHmzNGnSRLJkySLZsmWTBg0ayFW32iYgLCLC9KOsOGGCvLh0qfy3SZMotZGPVa5smrlblCkjx0JC5MD5867th4KDzXN1ORgUJFM7dPDRWSRtBEkfGDdunLz//vsybdo0+eWXX8yHZMuWLeXatWtxPmf+/PkyaNAgGT58uOzcuVMqV65snhMYGGi2nz592izjx4+XvXv3yqxZs2TFihXSp08fL54ZEgP9Uv/4449dj+vWrWu+7Bs2bOhaN3XqVHnzzTfNv7Ws6AVH5syZo7zOkSNHTDmLbubMmfLCCy949ByQvD/PnLZt2ybTp0+XSpUqxRoiW7VqJS1atDAXzbpvv379JPU/fd8A5ewj+duLL8rAevWk94IFcu3GDTM4Z/6ePfLlnj1y/3vvmUE5V2/ckC/caiWdfSR1mdWpkxTLkcOn55JU8X+kl+kX+sSJE+WNN96Q9u3bmw/Qzz//3ITAxYsXx/m8CRMmyFNPPSW9evWS8uXLmw9t/eKfMWOG2V6xYkVZuHChtG3bVkqVKmWu4t966y1ZunSp3Lx504tnCF9bt26dNGrUyPW4cePGsmXLlhjr1q5dK35+fqbM7NmzRx599FEfHTFS2ueZCgsLk65du5qLnhyxfIG/+OKLplXl1VdflQoVKpjWlk6dOkkGt8ESgJPWag9p2FByZc4sn27fLssPHJDiOXLIHy+9ZEKmLquefFLm//qrCZlIOARJL9NanrNnz5rmHyd/f3/TVK1X4LGJiIiQHTt2RHmOXpXr47ieo7SWSZuD0qZlcH5KoqGxYMGCUqhQIfNYA6TWPjqDZP78+U3XBy07Xbp0kdWrV5sLFWqv4Y3PM6e+fftKmzZtojzXSVtatHYzb9688uCDD0q+fPlMjfqmTZs8ch5IPmFydMuWMmnTJvlk61Z5NFpNd0CePFLAz0++P3DAZ8eYHBEkvUw/dJV+MLrTx85t0QUFBUlkZGS8nzNq1Ch5+umnE+zYkTTcuHFDfv75Z1PrmD59eilRooR8//33UrhwYVObo+v1S/769esmPGqt9rJly6RMmTJyn9v8a4AnPs/UvHnzTBedMWPGxLr9r7/+Mj9HjBhhWmK0m061atWkadOm8ep/ieRDm52PDx16x3X1iheXPwcPlqU9e8oztWvHeJ2Nzz4r7cqXl/olSkSZSgj2CJIeNmfOHMmaNatr0S95T7t06ZK50tcmcP0gRspt3nYflKU1lXXq1DHrdbt2h9ABEj/88IPp/jB79mzp3bu3rw8dyfzz7MSJE6aPrb6WDhyMza1bt8zPZ555xnTnqVq1qrz33numedvZnQdA4kCbp4e1a9fOfJk7aS2QOnfunPkSd9LHVapUifU1cufOLWnSpDH7uNPH2kzp7vLly6aDuvZ9W7RokaRLly6BzwhJgQZFrW08fvy4rF+/3qzbsGGDqY3UpWfPnma7lhNn7Y+WFe0y8frrr5sacMATn2faTUebrrWG0UnL28aNG2Xy5MnmNZ2vpRfD7sqVK2fKNIDEgyDpYfpFrYt753QNf2vWrHF90GoNovYHei6OanZtnqxevbp5Tod/pifQK3Z9rKMYnfR1dLSkNl9+++23cV7tI/nTEa7av0wHM+iXvzNIahO2fklv375dlixZIg888IAccOsvpLWWWput5QfwxOeZNk//9ttvUdZprWPZsmXllVdeMRfNOs2Z9vN1L5vqzz//lNatW3vk3JA0HA4OlucWLZLgK1ckW8aM8mGHDlLun9smuvt8506ZuGmTuf2hzi35bps2ki5NmjtuQ/zRtO1lzvnSRo8ebb6s9QO1e/fu5kPTGRKdH7Z6de6kU//o6MbPPvtM/vjjD/MhHR4ebj6AnR/eOk2Grvv000/NY+2jpAu1SymPNlXrwAT90nd+GWvfMn2s63WE7bFjx2J8UWtzo/ugm5UrV5qmSOfiHMCjXSbc1z/yyCNePkMk1c8zLYParcJ90SmDcuXKZf7tfN3BgwebaYUWLFgghw4dkv/+97+yf/9+BoWlcAOXLpUe1avLjgEDzB1rno9ldoCjFy/K22vXyve9esmuAQMkMCxMZu3YccdtsEONpA8MGTLEBD4dCBMSEiL16tUzncndaxAPHz5sBsw4de7cWc6fP2/mCNRwqFf/+hxnJ3ftuK61AKp06dIxRlbqFT5SlthqbvQL3unrr7+Osf2DDz4wi9JBOrHRixfnBQxg83l2NzSg6lyUOg2Q3nBB5zRdtWqVmd4MKdP5sDDZffq0LOrWzTzWQTODly+Xv4KDpWSuXK79vv39d2kdECD5/qk9712jhkz48Ud5qlat226DHYKkD+jVtk7H4pwQOjZHjx6NsU6bsd2bst3pAAptZgKApPB55s7Zjzc6nUNSF0CdunTJBMC0/zRDa9kr7O8vJ0JDowRJfVwke3bX46LZs8vJf+7ydrttsEPTNgAAAKwQJAEAQKJXKFs2OXf5stz8p9+/tsJpbWIRf/8o++njEyEhrsfHQ0JMzeWdtsEOQRIAACR6ebJmlUoFCpjbHCrt71gwW7YozdqqXbly5u41Gjo1bM7Yvl0e+mcg1+22wQ59JAEAQJIwsW1bM1JbB8j4ZcggU/6ZHaD/kiVmEM3/lS0rxXPmlKGNG0vLfyav17vd9KpRw/z7dttghyAJAACShDK5c8uqJ5+Msf6D9u2jPNYpgnSJze22If5o2gYAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAN4Jkhs3bpS2bdtKwYIFJVWqVLJ48WLPHBkAAACSV5AMDw+XypUry5QpUzxzRAAAAEgS0sb3Ca1btzYLAAAAUrZ4B8n4un79ulmcLl26ZH6uX79eTp065em3Rwp18eJFKVKkiBQqVEgqVark68NBMpUvXz7zc/Xq1XLgwAFfHw6SqW3btknjxo1l/fHjcig42NeHg2TqcEiI1fNSORwOh+2bah/JRYsWSYcOHeLcZ8SIETJy5MgY6+fOnSuZM2e2fWsAAAAkkCtXrsjjjz8uoaGhki1btsRTIzl06FAZNGhQlBpJrSnacvSMFAso7+m3Rwq1d8tP0r5uDQkLC5OAgABfHw6SKa2JLFeuHOUMHkU5gzfs2rXL6nkeD5IZMmQwS3T5ihST4hUqe/rtkUKdOnrY/NQP3Ro1avj6cJBMOZuzKWfwJMoZvEEvVGwwjyQAAAC8UyOpifXQoUOux0eOHJHdu3dLzpw5pWjRonZHAQAAgOQfJLdv325Gjzk5+z/26NFDZs2albBHBwAAgOQTJBs1aiT3MNAbAAAAyQR9JAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADACkESAAAAVgiSAAAAsEKQBAAAgBWCJAAAAKwQJAEAAGCFIAkAAAArBEkAAABYIUgCAADASlq7pyG+tvywXBZOmyS3bkVKxPXrkjNvfkmXIYNcDDxrth/d/7sULVNWUqdJLZmyZJXRcxbL1bAwebJBFXmwdTvp+9YE12vt/eVneevpJ6RgiZJy65ZDMmTMKL1fe1NCLwTJvPffMfuEBJ2XW5GRkjNffvO4fZ/npUHbh3x09gAAIDkiSHrBxcBzMm3YYBm3cKXkLVTYrPtr369Sovz9kipVKvP44bIFZfScRZIlm7/reT99v0RKlq8kv6z6Xnq/NkoyZcni2qYh8t3Fq82/l8+eIVNeHySTvtsgNZu0NOvmfzBewi9fMgETAADAE2ja9oKQ4POSOk0a8fPP7lpXskIlV4iMy5oF86TDU32lfI3aJlTGpVKd+nL+9MkEPWYAAIA7oUbSC4oFlJey1WrJs01rSfmaD0hA1RpS/z8dJVe+AnE+58ShPyXo7GmpUq+R3Iq8KYs+mizNHnk81n03r1wmdf+vvQfPAAAAICZqJL0gderUMuSDT+StL5dI1XqNZf/ObTLwP43lzLEjcT5nzYIvpVH7RyRNmjRSrUFTOXfqhJw8fNC1/fSRv+SlDs2kT73K8t3nn8jDzwzw0tkgqXA4HDJs2DApUKCAZMqUSZo1ayYHD/5bhmIzdepUqVSpkmTLls0sderUke+//961/cKFC9K/f38JCAgwr1m0aFEZMGCAhIaGeuGMkFzK2YgRI0yLjPtStmxZ13bKGRKinBUvXjxGOdOlb9++ZvvRo0dj3a7L119/7aUzS/oIkl5UuGQZafFYN3l1yky5r3I12bb2h1j3u3njhmz4doGsX/y1PNuklvRt8aBEXL0qaxbMjdFHcvq67VK7eWuZOLif+R8NcBo3bpy8//77Mm3aNPnll18kS5Ys0rJlS7l27VqczylcuLCMHTtWduzYIdu3b5cmTZpI+/btZd++fWb76dOnzTJ+/HjZu3evzJo1S1asWCF9+vTx4pkhqZczVaFCBTlz5oxr2bRpk2sb5QwJUc62bdsWpYytWrXKrH/00UfNzyJFikTZrsvIkSMla9as0rp1a6+dW1JH07YXBJ87I+dPnTDN2yosNEQCTx6X/EWLxbq/Bsx8RYrJ2PnLXOu0NnJY94el66DXouybNl06M6CmX6v6snX1ChMqAb2omDhxorzxxhsmCKrPP/9c8uXLJ4sXL5bHHnss1ue1bds2yuO33nrL1FJu2bLFfPFXrFhRFi5c6NpeqlQps88TTzwhN2/elLRp+UhJSWzLmdKykj//37NKREc5Q0KUszx58kR5rBfJWpYaNmxoHmuLX/QyuGjRIunUqZMJk7g71Eh6QeTNSPlqynvSr2U90xz9xhMdpVGHTlKraatY91+z8Etp8J+OUdYVLlXGTOWzfd3fV1TuMmTKLI8PfEXmT36XWkkYR44ckbNnz5rmHyd/f3+pXbu2bN68+a5eIzIyUubNmyfh4eGmiTsu2tyozeB8uac891LOtFmyYMGCUrJkSenatascP378tvtTzlKuhPg8i4iIkNmzZ0vv3r3jHOiqLTG7d++m5jue+D/SC3TKn2GffnnbfRbuP+369xsfzY51n/Hf/NsU7pz6x6lxx05mcerc/+V7OGIkdfqhq/SK3Z0+dm6Ly2+//WaCozYZ6VW5XqGXL18+1n2DgoJk1KhR8vTTTyfg0SO5lzMNANpcrX0gnc2J9evXN83Yfn5+MfannKVs9/J55qQ1lyEhIdKzZ8849/n000+lXLly8uCDD97jEacs1EgCycCcOXNM6HMuN27csH4t/XLXq3Lth/Tcc89Jjx495Pfff4+x36VLl6RNmzYmZOrgCSR/CVXOtP+Z9lPTgV3az2358uXmS/6rr76KsS/lLOVJyM8z95Co5U5rwWNz9epVmTt3LrWRFqiRBJKBdu3amVoep+vXr5uf586dM6McnfRxlSpVbvta6dOnl9KlS5t/V69e3XRYnzRpkkyfPt21z+XLl6VVq1am9khrLNOlS+eBs0JyLmfusmfPLvfdd58cOnQoynrKWcqU0OXs2LFjsnr1avnmm2/i3GfBggVy5coV6d69+z0ff0pDkPSx00f/ksmvviCXLl6UzH5+0m/MRClaJiDGfoEnT8jkoQPlyB97JW/hIlGattcunCffffGJ63Hw2TNmvsohH3zqtfOAb+kXrXuToPaV1U7ka9ascX3Qas2Os5YxPm7duuX6IHe+jtYiZciQQb799lvJmDFjAp4JUmI5CwsLk8OHD0u3bt1c6yhnKVdCl7OZM2dK3rx5Tc327WosNcBGH6CDO6Np28emD39FmnV6Qiav3CQdn+xrwmJsMmXNKl0GDpGB46fE2Nbk4cdMsHQu2fPklfr/4b7aKZl2Jh84cKCMHj3afAlrv0e90tZmnQ4dOrj2a9q0qUyePNn1eOjQobJx40Yzv5o+Rx+vX7/eDIZwfni3aNHCDMDRD159rH2UdNHBOUhZbMvZyy+/LBs2bDDl7Oeff5aOHTuaEbRdunQx2ylnSIhy5rwQ1iCpXXTiGqilNeH6uffkk096/FySI2okfSg0OEgO793jGojzQMs28sno181E5QWKlYiyr1/2HFKuem3Z+8vPt33NP/fsNK9bs0kLjx47Er8hQ4aYL2IdoKD9z+rVq2fm4nOv2dFaIB3I4BQYGGg+oHUAhI6K1D5sK1eulObNm5vtO3fuNLUAytn87T6yUicARspiU85OnjxpQmNwcLCpAdLn6BRTztogyhkSopwpbdLWGQF0tHZcZsyYYebQ1YsXxB9B0oeCzpyWHHnySpp/rpL0qit3gUISdOZUjCB5t/SOOA3bPWLml0TKpuXpzTffNEtctEbIndb+3E6jRo2YYgr3XM50WqnboZwhIcqZ0nB4p7L09ttvmwV2aNpORq5duSI/LV8iTR/5u3kIAADAk6iR9DK97eHSWX+Pfq3XpoNcPB8okTdvmlpJvWrS2kitlbTx84qlUqR0gBQpfV8CHzUAAEBMBEkva9ThUbM47dq4TjZ8u1CaPNRZtqz8TnLlK2DdrL124ZfS9JG4b0kGAACQkGja9rFnRv5PVs2fbW6fuOjjydL37fdc2z584yXZtnal+ff1q1fkqYbV5d2BT5v7buu/Z7/7b5+OU38dkiN/7JO6rf++DykAAICnUSPpY4VKlpYx85fGuu350e9GuZ/2xxt23PZ15uw86JFjBAAAiA01kgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwApBEgAAAFYIkgAAALBCkAQAAIAVgiQAAACsECQBAABghSAJAAAAKwRJAAAAWCFIAgAAwHtBcsqUKVK8eHHJmDGj1K5dW7Zu3Wr37gAAAEg5QXL+/PkyaNAgGT58uOzcuVMqV64sLVu2lMDAQM8cIQAAAJJHkJwwYYI89dRT0qtXLylfvrxMmzZNMmfOLDNmzPDMEQIAACBRShufnSMiImTHjh0ydOhQ17rUqVNLs2bNZPPmzbE+5/r162ZxCg0NNT+PHzxgf9TAHZw7dkyuFMgpu3btkrCwMF8fDpKpgwcPSkBAAOUMHkU5gzf8+uuv5qfD4fBckAwKCpLIyEjJly9flPX6eP/+/bE+Z8yYMTJy5MgY66e/+Xq8DhSIrznv+/oIAABIWoKDg8Xf398zQdKG1l5qn0qnkJAQKVasmBw/fjxeBwrEx6VLl6RIkSJy4sQJyZYtm68PB8kU5QzeQDmDN2iLcdGiRSVnzpzxel68gmTu3LklTZo0cu7cuSjr9XH+/PljfU6GDBnMEp2GSP6HgKdpGaOcwdMoZ/AGyhm8Qbssxmv/+OycPn16qV69uqxZs8a17tatW+ZxnTp14vXGAAAASNri3bStzdQ9evSQGjVqSK1atWTixIkSHh5uRnEDAAAg5Yh3kOzcubOcP39ehg0bJmfPnpUqVarIihUrYgzAiYs2c+sclLE1dwMJhXIGb6CcwRsoZ0jM5SyVI77jvAEAAADutQ0AAABbBEkAAABYIUgCAADACkESAAAAiT9ITpkyRYoXLy4ZM2aU2rVry9atW7359kgBNm7cKG3btpWCBQtKqlSpZPHixb4+JCQzetvXmjVrip+fn+TNm1c6dOggBw4c8PVhIZmZOnWqVKpUyTUJuc7V/P333/v6sJDMjR071nx3Dhw4MPEFyfnz55s5KHVo+c6dO6Vy5crSsmVLCQwM9NYhIAXQOU21bOlFC+AJGzZskL59+8qWLVtk1apVcuPGDWnRooUpe0BCKVy4sPlS37Fjh2zfvl2aNGki7du3l3379vn60JBMbdu2TaZPn24uYBLl9D9aA6lX8ZMnT3bdEUfvHdq/f3959dVXvXEISGH0qmrRokWmxgjwFJ1XV2smNWA2aNDA14eDZEzvgfzOO+9Inz59fH0oSGbCwsKkWrVq8uGHH8ro0aPNHOF6w5lEUyMZERFhrqqaNWv27xunTm0eb9682RuHAAAeERoa6vqSBzwhMjJS5s2bZ2q9uR0xPEFbWdq0aRMlp3nszjY2goKCzP8I0e9+o4/379/vjUMAgASnLSval6hu3bpSsWJFXx8OkpnffvvNBMdr165J1qxZTQtL+fLlfX1YSGbmzZtnuhxq07YNrwRJAEiuV/F79+6VTZs2+fpQkAwFBATI7t27Ta33ggULpEePHqYLBWESCeXEiRPywgsvmP7eOhA60QbJ3LlzS5o0aeTcuXNR1uvj/Pnze+MQACBB9evXT5YtW2ZmCtCBEUBCS58+vZQuXdr8u3r16qbGaNKkSWZABJAQtNuhDnrW/pFO2oKsn2s6puX69esmv/m8j6T+z6D/E6xZsyZKk5A+pr8HgKRExydqiNRmxrVr10qJEiV8fUhIIfR7U7/YgYTStGlT04VCa76dS40aNaRr167m33cKkV5t2tapf7RaXg+wVq1aZjSQdhzu1auXtw4BKWTk2aFDh1yPjxw5Yv5n0IEQRYsW9emxIfk0Z8+dO1eWLFli5pI8e/asWe/v7y+ZMmXy9eEhmRg6dKi0bt3afG5dvnzZlLn169fLypUrfX1oSEb8/Pxi9O/OkiWL5MqV6677fXstSHbu3NlMkzFs2DDzwatDy1esWBFjAA5wL3S+tcaNG0e5gFF6ETNr1iwfHhmS00TRqlGjRlHWz5w5U3r27Omjo0Jyo82N3bt3lzNnzpiLFJ3bT0Nk8+bNfX1ogG/mkQQAAEDywr22AQAAYIUgCQAAACsESQAAAFghSAIAAMAKQRIAAABWCJIAAACwQpAEAACAFYIkAAAArBAkAQAAYIUgCQAAACsESQAAAFghSAIAAEBs/D8uReQMIOUvYwAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 800x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "from collections import defaultdict\n",
    "\n",
    "class GridWorld:\n",
    "    def __init__(self):\n",
    "        self.action_space = [0, 1, 2, 3]\n",
    "        self.action_meaning = {0: 'up', 1: 'right', 2: 'down', 3: 'left'}\n",
    "        self.reward_map=np.array(\n",
    "            [[0,0,0,1.0],\n",
    "             [0,None,0,-1.0],\n",
    "             [0,0,0,0]]\n",
    "        )\n",
    "        self.goal_state=(0,3)\n",
    "        self.wall_state=(1,1)\n",
    "        self.start_state=(2,0)\n",
    "        self.agent_state=self.start_state\n",
    "    \n",
    "    @property\n",
    "    def height(self):\n",
    "        return self.reward_map.shape[0]\n",
    "    \n",
    "    @property\n",
    "    def width(self):\n",
    "        return self.reward_map.shape[1]\n",
    "\n",
    "    @property\n",
    "    def shape(self):\n",
    "        return self.reward_map.shape\n",
    "\n",
    "    @property\n",
    "    def actions(self):\n",
    "        return self.action_space\n",
    "\n",
    "    @property\n",
    "    def states(self):\n",
    "        for i in range(self.height):\n",
    "            for j in range(self.width):\n",
    "                if (i, j) != self.wall_state:  # 排除墙壁状态\n",
    "                    yield (i, j)\n",
    "\n",
    "    def next_state(self, state, action):\n",
    "        # 根据当前状态和动作计算下一个状态的位置\n",
    "        action_move_map = [(-1, 0), (0, 1), (1, 0), (0, -1)]\n",
    "        move=action_move_map[action]\n",
    "        next_state=(state[0]+move[0],state[1]+move[1])\n",
    "        ny,nx=next_state\n",
    "        if ny<0 or ny>=self.height or nx<0 or nx>=self.width:\n",
    "            next_state=state\n",
    "        elif (ny,nx)==self.wall_state:\n",
    "            next_state=state\n",
    "        return next_state\n",
    "    \n",
    "    def reward(self, state, action, next_state):\n",
    "        return self.reward_map[next_state]\n",
    "    \n",
    "    def is_terminal(self, state):\n",
    "        return state == self.goal_state or state == (1, 3)  # 目标状态或负奖励状态\n",
    "    \n",
    "    def reset(self):\n",
    "        self.agent_state = self.start_state\n",
    "        return self.agent_state\n",
    "    \n",
    "    def step(self, action):\n",
    "        current_state = self.agent_state\n",
    "        next_state = self.next_state(current_state, action)\n",
    "        reward = self.reward(current_state, action, next_state)\n",
    "        self.agent_state = next_state\n",
    "        done = self.is_terminal(next_state)\n",
    "        return next_state, reward, done\n",
    "    \n",
    "    def render_v(self, value_function=None):\n",
    "        \"\"\"可视化价值函数\"\"\"\n",
    "        if value_function is None:\n",
    "            value_function = {}\n",
    "        \n",
    "        fig, ax = plt.subplots(figsize=(8, 6))\n",
    "        \n",
    "        # 创建网格显示\n",
    "        for i in range(self.height):\n",
    "            for j in range(self.width):\n",
    "                if (i, j) == self.wall_state:\n",
    "                    # 墙壁用黑色表示\n",
    "                    ax.add_patch(plt.Rectangle((j, self.height-1-i), 1, 1, \n",
    "                                             facecolor='black', edgecolor='white'))\n",
    "                    ax.text(j+0.5, self.height-1-i+0.5, 'WALL', \n",
    "                           ha='center', va='center', color='white', fontsize=8)\n",
    "                elif (i, j) == self.goal_state:\n",
    "                    # 目标状态用绿色表示\n",
    "                    ax.add_patch(plt.Rectangle((j, self.height-1-i), 1, 1, \n",
    "                                             facecolor='lightgreen', edgecolor='black'))\n",
    "                    value = value_function.get((i, j), 0)\n",
    "                    ax.text(j+0.5, self.height-1-i+0.5, f'GOAL\\n{value:.2f}', \n",
    "                           ha='center', va='center', fontsize=8)\n",
    "                elif (i, j) == (1, 3):  # 负奖励状态\n",
    "                    ax.add_patch(plt.Rectangle((j, self.height-1-i), 1, 1, \n",
    "                                             facecolor='lightcoral', edgecolor='black'))\n",
    "                    value = value_function.get((i, j), 0)\n",
    "                    ax.text(j+0.5, self.height-1-i+0.5, f'TRAP\\n{value:.2f}', \n",
    "                           ha='center', va='center', fontsize=8)\n",
    "                elif (i, j) == self.start_state:\n",
    "                    # 起始状态用蓝色表示\n",
    "                    ax.add_patch(plt.Rectangle((j, self.height-1-i), 1, 1, \n",
    "                                             facecolor='lightblue', edgecolor='black'))\n",
    "                    value = value_function.get((i, j), 0)\n",
    "                    ax.text(j+0.5, self.height-1-i+0.5, f'START\\n{value:.2f}', \n",
    "                           ha='center', va='center', fontsize=8)\n",
    "                else:\n",
    "                    # 普通状态用白色表示\n",
    "                    ax.add_patch(plt.Rectangle((j, self.height-1-i), 1, 1, \n",
    "                                             facecolor='white', edgecolor='black'))\n",
    "                    value = value_function.get((i, j), 0)\n",
    "                    ax.text(j+0.5, self.height-1-i+0.5, f'{value:.2f}', \n",
    "                           ha='center', va='center', fontsize=10)\n",
    "        \n",
    "        ax.set_xlim(0, self.width)\n",
    "        ax.set_ylim(0, self.height)\n",
    "        ax.set_aspect('equal')\n",
    "        ax.set_title('Grid World Value Function')\n",
    "        ax.set_xticks(range(self.width+1))\n",
    "        ax.set_yticks(range(self.height+1))\n",
    "        plt.grid(True)\n",
    "        plt.show()\n",
    "\n",
    "# 蒙特卡洛方法实现\n",
    "def random_policy(env, state):\n",
    "    \"\"\"随机策略\"\"\"\n",
    "    return random.choice(env.actions)\n",
    "\n",
    "def epsilon_greedy_policy(env, state, Q, epsilon=0.1):\n",
    "    \"\"\"ε-贪婪策略\"\"\"\n",
    "    if random.random() < epsilon:\n",
    "        return random.choice(env.actions)\n",
    "    else:\n",
    "        # 选择Q值最大的动作\n",
    "        q_values = [Q.get((state, action), 0) for action in env.actions]\n",
    "        max_q = max(q_values)\n",
    "        best_actions = [action for action, q in zip(env.actions, q_values) if q == max_q]\n",
    "        return random.choice(best_actions)\n",
    "\n",
    "def generate_episode(env, policy, Q=None, epsilon=0.1):\n",
    "    \"\"\"生成一个episode\"\"\"\n",
    "    episode = []\n",
    "    state = env.reset()\n",
    "    \n",
    "    while True:\n",
    "        if Q is not None:\n",
    "            action = epsilon_greedy_policy(env, state, Q, epsilon)\n",
    "        else:\n",
    "            action = policy(env, state)\n",
    "        \n",
    "        next_state, reward, done = env.step(action)\n",
    "        episode.append((state, action, reward))\n",
    "        \n",
    "        if done:\n",
    "            break\n",
    "        state = next_state\n",
    "    \n",
    "    return episode\n",
    "\n",
    "def monte_carlo_first_visit(env, num_episodes=10000, gamma=0.9):\n",
    "    \"\"\"首次访问蒙特卡洛方法\"\"\"\n",
    "    returns = defaultdict(list)\n",
    "    value_function = defaultdict(float)\n",
    "    \n",
    "    for episode_num in range(num_episodes):\n",
    "        # 生成episode\n",
    "        episode = generate_episode(env, random_policy)\n",
    "        \n",
    "        # 计算每个状态的回报\n",
    "        G = 0\n",
    "        visited_states = set()\n",
    "        \n",
    "        # 从后往前计算回报\n",
    "        for t in reversed(range(len(episode))):\n",
    "            state, action, reward = episode[t]\n",
    "            G = gamma * G + reward\n",
    "            \n",
    "            # 首次访问\n",
    "            if state not in visited_states:\n",
    "                visited_states.add(state)\n",
    "                returns[state].append(G)\n",
    "                value_function[state] = np.mean(returns[state])\n",
    "        \n",
    "        # 每1000个episode打印一次进度\n",
    "        if (episode_num + 1) % 1000 == 0:\n",
    "            print(f\"Episode {episode_num + 1}/{num_episodes} completed\")\n",
    "    \n",
    "    return dict(value_function)\n",
    "\n",
    "def monte_carlo_control(env, num_episodes=10000, gamma=0.9, epsilon=0.1):\n",
    "    \"\"\"蒙特卡洛控制方法\"\"\"\n",
    "    Q = defaultdict(float)\n",
    "    returns = defaultdict(list)\n",
    "    \n",
    "    for episode_num in range(num_episodes):\n",
    "        # 生成episode\n",
    "        episode = generate_episode(env, None, Q, epsilon)\n",
    "        \n",
    "        # 计算每个状态-动作对的回报\n",
    "        G = 0\n",
    "        visited_pairs = set()\n",
    "        \n",
    "        # 从后往前计算回报\n",
    "        for t in reversed(range(len(episode))):\n",
    "            state, action, reward = episode[t]\n",
    "            G = gamma * G + reward\n",
    "            \n",
    "            # 首次访问\n",
    "            state_action = (state, action)\n",
    "            if state_action not in visited_pairs:\n",
    "                visited_pairs.add(state_action)\n",
    "                returns[state_action].append(G)\n",
    "                Q[state_action] = np.mean(returns[state_action])\n",
    "        \n",
    "        # 每1000个episode打印一次进度\n",
    "        if (episode_num + 1) % 1000 == 0:\n",
    "            print(f\"Episode {episode_num + 1}/{num_episodes} completed\")\n",
    "    \n",
    "    # 从Q函数计算价值函数\n",
    "    value_function = {}\n",
    "    for state in env.states:\n",
    "        q_values = [Q.get((state, action), 0) for action in env.actions]\n",
    "        value_function[state] = max(q_values) if q_values else 0\n",
    "    \n",
    "    return dict(value_function), dict(Q)\n",
    "\n",
    "# 测试代码\n",
    "env = GridWorld()\n",
    "print(\"Grid World Environment:\")\n",
    "print(f\"Shape: {env.shape}\")\n",
    "print(f\"Actions: {env.actions}\")\n",
    "print(f\"Start state: {env.start_state}\")\n",
    "print(f\"Goal state: {env.goal_state}\")\n",
    "print(f\"Wall state: {env.wall_state}\")\n",
    "\n",
    "# 使用蒙特卡洛方法计算价值函数\n",
    "print(\"\\nRunning Monte Carlo First Visit...\")\n",
    "value_function = monte_carlo_first_visit(env, num_episodes=5000)\n",
    "\n",
    "print(\"\\nValue Function:\")\n",
    "for state in sorted(value_function.keys()):\n",
    "    print(f\"State {state}: {value_function[state]:.3f}\")\n",
    "\n",
    "# 可视化价值函数\n",
    "env.render_v(value_function)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
